library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(forcats)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(p8105.datasets)
## 
## Attaching package: 'p8105.datasets'
## The following object is masked _by_ '.GlobalEnv':
## 
##     instacart
library(patchwork)
data("instacart")
instacart
## # A tibble: 1,384,617 x 15
##    order_id product_id add_to_cart_order reordered user_id eval_set order_number
##       <int>      <int>             <int>     <int>   <int> <chr>           <int>
##  1        1      49302                 1         1  112108 train               4
##  2        1      11109                 2         1  112108 train               4
##  3        1      10246                 3         0  112108 train               4
##  4        1      49683                 4         0  112108 train               4
##  5        1      43633                 5         1  112108 train               4
##  6        1      13176                 6         0  112108 train               4
##  7        1      47209                 7         0  112108 train               4
##  8        1      22035                 8         1  112108 train               4
##  9       36      39612                 1         0   79431 train              23
## 10       36      19660                 2         1   79431 train              23
## # ... with 1,384,607 more rows, and 8 more variables: order_dow <int>,
## #   order_hour_of_day <int>, days_since_prior_order <int>, product_name <chr>,
## #   aisle_id <int>, department_id <int>, aisle <chr>, department <chr>
instacart=
  instacart %>%
  select(aisle, product_name, order_hour_of_day, 
         department, order_dow) %>%   
  filter(department=="produce") %>%
  mutate(aisle= factor(aisle))

instacart
## # A tibble: 409,087 x 5
##    aisle                      product_name order_hour_of_d~ department order_dow
##    <fct>                      <chr>                   <int> <chr>          <int>
##  1 fresh vegetables           Organic Cel~               10 produce            4
##  2 fresh vegetables           Cucumber Ki~               10 produce            4
##  3 fresh fruits               Bag of Orga~               10 produce            4
##  4 fresh fruits               Organic Has~               10 produce            4
##  5 packaged vegetables fruits Super Green~               18 produce            6
##  6 fresh vegetables           Organic Gar~               18 produce            6
##  7 fresh vegetables           Asparagus                  18 produce            6
##  8 packaged vegetables fruits Organic Bio~               16 produce            6
##  9 packaged vegetables fruits Organic Bab~               16 produce            6
## 10 fresh vegetables           Organic Hot~               16 produce            6
## # ... with 409,077 more rows
instacart %>% 
  count(aisle) %>% 
  mutate(aisle = fct_reorder(aisle, n)) %>% 
  plot_ly(x = ~aisle, y = ~n, color = ~aisle, type = "bar")
instacart %>% 
  mutate(aisle= fct_reorder(aisle, order_hour_of_day))%>% 
  plot_ly(x = ~aisle, y = ~order_hour_of_day, color = ~aisle, type = "box")
instacart %>% 
  group_by(aisle, order_dow) %>% 
  mutate(mean_order_hour= mean(order_hour_of_day)) %>% 
  ungroup() %>% 
  plot_ly(x = ~order_dow, y = ~mean_order_hour, color = ~aisle)%>%
  add_lines()